{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "Feature selection on L1 norms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import sklearn.datasets as ds\n",
    "diabetes = ds.load_diabetes()\n",
    "\n",
    "X = diabetes.data\n",
    "y = diabetes.target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LinearRegression\n",
    "lr = LinearRegression()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.metrics import make_scorer, mean_squared_error\n",
    "from sklearn.model_selection import cross_val_score,ShuffleSplit"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-3053.393446308266"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "shuff = ShuffleSplit(n_splits=10, test_size=0.25, random_state=0)\n",
    "score_before = cross_val_score(lr,X,y,cv=shuff,scoring=make_scorer(mean_squared_error,greater_is_better=False)).mean()\n",
    "\n",
    "score_before"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([  -0.        , -226.2375274 ,  526.85738059,  314.44026013,\n",
       "       -196.92164002,    1.48742026, -151.78054083,  106.52846989,\n",
       "        530.58541123,   64.50588257])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.linear_model import LassoCV\n",
    "\n",
    "lasso_cv = LassoCV()\n",
    "lasso_cv.fit(X,y)\n",
    "lasso_cv.coef_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int64)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "columns = np.arange(X.shape[1])[lasso_cv.coef_ != 0]\n",
    "columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-3033.5012859289677"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "score_afterwards = cross_val_score(lr,X[:,columns],y,cv=shuff, scoring=make_scorer(mean_squared_error,greater_is_better=False)).mean()\n",
    "score_afterwards"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X, y = ds.make_regression(noise=5)\n",
    "shuff = ShuffleSplit(n_splits=10, test_size=0.25, random_state=0)\n",
    "\n",
    "score_before = cross_val_score(lr,X,y,cv=shuff, scoring=make_scorer(mean_squared_error,greater_is_better=False)).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Score before: -10065.7000392\n",
      "Score after:  -30.1665895048\n"
     ]
    }
   ],
   "source": [
    "lasso_cv = LassoCV()\n",
    "lasso_cv.fit(X,y)\n",
    "\n",
    "columns = np.arange(X.shape[1])[lasso_cv.coef_ != 0]\n",
    "score_afterwards = cross_val_score(lr,X[:,columns],y,cv=shuff, scoring=make_scorer(mean_squared_error,greater_is_better=False)).mean()\n",
    "print \"Score before:\",score_before\n",
    "print \"Score after: \",score_afterwards"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
